/********************************************************************************
Discrimination in Multi-Phase Systems: Evidence from Child Protection

Created on: 12/28/2022
Last Modified on: 2/13/2024

Description: This program generates the limited investigator-level analysis sample.
It contains 699 investigators, their placement and subsequent maltreatment rates, 
as well as standard errors for these rates. It also contains investigator covariates.

Note that we have removed the file directory names from this program for 
confidentiality reasons.
********************************************************************************/

**************************
**(0) SETUP
**************************
clear
set more off
macro drop all
capture log close
set seed 02042023

*Set directories 
global clean 
global cleand 
global tmpd 
global output 


**************************
**(1) ESTIMATE INVESTIGATOR LEAVE-AT-HOME AND SUBSEQUENT MALTREATMENT RATES 
**************************
use "${cleand}child_investigation_analysis_sample.dta", clear 

//get levels of invid
levelsof worker_id
local levels = "`r(levels)'"

// loop over outcomes 
foreach var in fc inv6m {

	// for lah outcome use the entire sample
	if "`var'"=="fc"{
		reghdfe `var' i.worker_id i.worker_id#i.pre_black, ///
			absorb(rotationgroup) resid
	}
	
	// for other outcomes condition on leave at home
	if "`var'"!="fc"{
		reghdfe `var' i.worker_id i.worker_id#i.pre_black if fc==0, ///
			absorb(rotationgroup) resid
	}
	
		gen beta_gap_`var' = .
		gen beta_base_`var' = .
				
		foreach i in `levels' {
		
			// retrieve gaps
			cap replace beta_gap_`var' =  _b[`i'.worker_id#1.pre_black] if worker_id == `i'
			
			// retrieve base values
			cap replace beta_base_`var' =  _b[`i'.worker_id] if worker_id == `i'			
			
	   }
	   
	// aggregate outcomes
	// E[Y|inv,r] = invFE + invFE*black + g*E[X_i]
   
	local exp 0
   
	// retrieve average of absorbvar
	
	predict xbd, xbd
	predict xb, xb
	gen d = xbd - xb 
	sum d 
	
	local exp = `exp'+ `r(mean)' 
	
	// replace base value equal to zero for omitted inv
	replace beta_base_`var'=0 if beta_base_`var'==.
	
	// generate aggregate outcomes
	local baselin "_cons"
	
	gen w_fe_`var'=.
	gen b_fe_`var'=.

	
	// quietly
	qui {
	// white children
	foreach i in `levels'{
	
		if `i'==1 {
		
			local lin "`baselin'"
			lincom `lin' + `exp'
			replace w_fe_`var'= `r(estimate)' if worker_id==`i'
			
		}
		
		else {
			
			local lin "`baselin' + _b[`i'.worker_id]"
			lincom `lin' + `exp'
			replace w_fe_`var' = `r(estimate)' if worker_id==`i'
			
		}
	}	
	
	// black children
	foreach i in `levels'{
	
		if `i'==1 {
		
			local lin "`baselin' + _b[`i'.worker_id#1.pre_black]"
			lincom `lin' + `exp'
			replace b_fe_`var' = `r(estimate)' if worker_id==`i'
			
		}
		
		else {
			
			local lin "`baselin' + _b[`i'.worker_id] + _b[`i'.worker_id#1.pre_black]"
			lincom `lin' + `exp'
			replace b_fe_`var' = `r(estimate)' if worker_id==`i'
			
		}
	}
		
	drop xbd xb d
	
	}
	 
}

*Save investigator-level rates
keep beta_* b_* w_* worker_id count_inv bshare
duplicates drop worker_id, force 	
drop if worker_id==.
save "${tmpd}inv_rates.dta", replace


**************************
**(3) COMPUTE THE SES OF INVESTIGATOR RATES 
**************************
use "${cleand}child_investigation_analysis_sample.dta", clear

preserve 
//get levels of invid
levelsof worker_id
local levels = "`r(levels)'"

// loop over outcomes 
foreach var in fc inv6m {

	// for lah outcome use the entire sample
	if "`var'"=="fc"{
		reghdfe `var' i.worker_id i.worker_id#i.pre_black, ///
			absorb(rotationgroup) resid
	}
	
	// for other outcomes condition on leave at home
	if "`var'"!="fc"{
		reghdfe `var' i.worker_id i.worker_id#i.pre_black if fc==0, ///
			absorb(rotationgroup) resid
	}
	
		gen se_gap_`var' = .
		gen se_base_`var' = .
				
		foreach i in `levels' {
		
			// retrieve gaps
			cap replace se_gap_`var' = _se[`i'.worker_id#1.pre_black] if worker_id == `i'
			
			// retrieve base values
			cap replace se_base_`var'= _se[`i'.worker_id] if  worker_id==`i'				
			
	   }
	   
	// aggregate outcomes
	// E[Y|inv,r] = invFE + invFE*black + g*E[X_i]
   
	local exp 0
   
	// retrieve average of absorbvar
	
	predict xbd, xbd
	predict xb, xb
	gen d = xbd - xb 
	sum d 
	
	local exp = `exp'+ `r(mean)' 
	
	// generate aggregate outcomes
	local baselin "_cons"
	
	gen w_se_fe_`var'=.
	gen b_se_fe_`var'=.
	
	// quietly
	qui {
	// white children
	foreach i in `levels'{
	
		if `i'==1 {
		
			local lin "`baselin'"
			lincom `lin' + `exp'
			replace w_se_fe_`var'=`r(se)' if worker_id==`i'
			
		}
		
		else {
			
			local lin "`baselin' + _b[`i'.worker_id]"
			lincom `lin' + `exp'
			replace w_se_fe_`var'=`r(se)' if worker_id==`i'
			
		}
	}	
	
	// black children
	foreach i in `levels'{
	
		if `i'==1 {
		
			local lin "`baselin' + _b[`i'.worker_id#1.pre_black]"
			lincom `lin' + `exp'
			replace b_se_fe_`var'=`r(se)' if worker_id==`i'
			
		}
		
		else {
			
			local lin "`baselin' + _b[`i'.worker_id] + _b[`i'.worker_id#1.pre_black]"
			lincom `lin' + `exp'
			replace b_se_fe_`var'=`r(se)' if worker_id==`i'
			
		}
	}
		
	drop xbd xb d
	
	}
	 
}

*Save dataset:
keep se_* b_* w_* worker_id
duplicates drop worker_id, force 	
drop if worker_id==.

save "${tmpd}inv_se.dta", replace
restore 



**************************
**(4) SES CLUSTERED AT THE INVESTIGATOR LEVEL 
**************************
preserve 
//get levels of invid
levelsof worker_id
local levels = "`r(levels)'"

// loop over outcomes 
foreach var in fc inv6m {

	// for lah outcome use the entire sample
	if "`var'"=="fc"{
		reghdfe `var' i.worker_id i.worker_id#i.pre_black, ///
			absorb(rotationgroup) cluster(worker_id) resid 
	}
	
	// for other outcomes condition on leave at home
	if "`var'"!="fc"{
		reghdfe `var' i.worker_id i.worker_id#i.pre_black if fc==0, ///
			absorb(rotationgroup) cluster(worker_id) resid 
	}
	
		gen se_worker_gap_`var' = .
		gen se_worker_base_`var' = .
				
		foreach i in `levels' {
		
			// retrieve gaps
			cap replace se_worker_gap_`var' = _se[`i'.worker_id#1.pre_black] if worker_id == `i'
			
			// retrieve base values
			cap replace se_worker_base_`var'= _se[`i'.worker_id] if  worker_id==`i'				
			
	   }
	   
	// aggregate outcomes
	// E[Y|inv,r] = invFE + invFE*black + g*E[X_i]
   
	local exp 0
   
	// retrieve average of absorbvar
	
	predict xbd, xbd
	predict xb, xb
	gen d = xbd - xb 
	sum d 
	
	local exp = `exp'+ `r(mean)' 
	
	// generate aggregate outcomes
	local baselin "_cons"
	
	gen w_se_worker_fe_`var'=.
	gen b_se_worker_fe_`var'=.
	
	// quietly
	qui {
	// white children
	foreach i in `levels'{
	
		if `i'==1 {
		
			local lin "`baselin'"
			lincom `lin' + `exp'
			replace w_se_worker_fe_`var'=`r(se)' if worker_id==`i'
			
		}
		
		else {
			
			local lin "`baselin' + _b[`i'.worker_id]"
			lincom `lin' + `exp'
			replace w_se_worker_fe_`var'=`r(se)' if worker_id==`i'
			
		}
	}	
	
	// black children
	foreach i in `levels'{
	
		if `i'==1 {
		
			local lin "`baselin' + _b[`i'.worker_id#1.pre_black]"
			lincom `lin' + `exp'
			replace b_se_worker_fe_`var'=`r(se)' if worker_id==`i'
			
		}
		
		else {
			
			local lin "`baselin' + _b[`i'.worker_id] + _b[`i'.worker_id#1.pre_black]"
			lincom `lin' + `exp'
			replace b_se_worker_fe_`var'=`r(se)' if worker_id==`i'
			
		}
	}
		
	drop xbd xb d
	
	}
	 
}
*Save dataset:
keep se_* b_* w_* worker_id
duplicates drop worker_id, force 	
drop if worker_id==.

save "${tmpd}inv_se_worker.dta", replace
restore 


**************************
**(5) SES TW CLUSTERED AT THE INVESTIGATOR AND CHILD LEVELS 
**************************
preserve 
//get levels of invid
levelsof worker_id
local levels = "`r(levels)'"

// loop over outcomes 
foreach var in fc inv6m {

	// for lah outcome use the entire sample
	if "`var'"=="fc"{
		reghdfe `var' i.worker_id i.worker_id#i.pre_black, ///
			absorb(rotationgroup) cluster(worker_id vicid) resid 
	}
	
	// for other outcomes condition on leave at home
	if "`var'"!="fc"{
		reghdfe `var' i.worker_id i.worker_id#i.pre_black if fc==0, ///
			absorb(rotationgroup) cluster(worker_id vicid) resid 
	}
	
		gen se_tw_gap_`var' = .
		gen se_tw_base_`var' = .
				
		foreach i in `levels' {
		
			// retrieve gaps
			cap replace se_tw_gap_`var' = _se[`i'.worker_id#1.pre_black] if worker_id == `i'
			
			// retrieve base values
			cap replace se_tw_base_`var'= _se[`i'.worker_id] if  worker_id==`i'				
			
	   }
	   
	// aggregate outcomes
	// E[Y|inv,r] = invFE + invFE*black + g*E[X_i]
   
	local exp 0
   
	// retrieve average of absorbvar
	
	predict xbd, xbd
	predict xb, xb
	gen d = xbd - xb 
	sum d 
	
	local exp = `exp'+ `r(mean)' 
	
	// generate aggregate outcomes
	local baselin "_cons"
	
	gen w_se_tw_fe_`var'=.
	gen b_se_tw_fe_`var'=.
	
	// quietly
	qui {
	// white children
	foreach i in `levels'{
	
		if `i'==1 {
		
			local lin "`baselin'"
			lincom `lin' + `exp'
			replace w_se_tw_fe_`var'=`r(se)' if worker_id==`i'
			
		}
		
		else {
			
			local lin "`baselin' + _b[`i'.worker_id]"
			lincom `lin' + `exp'
			replace w_se_tw_fe_`var'=`r(se)' if worker_id==`i'
			
		}
	}	
	
	// black children
	foreach i in `levels'{
	
		if `i'==1 {
		
			local lin "`baselin' + _b[`i'.worker_id#1.pre_black]"
			lincom `lin' + `exp'
			replace b_se_tw_fe_`var'=`r(se)' if worker_id==`i'
			
		}
		
		else {
			
			local lin "`baselin' + _b[`i'.worker_id] + _b[`i'.worker_id#1.pre_black]"
			lincom `lin' + `exp'
			replace b_se_tw_fe_`var'=`r(se)' if worker_id==`i'
			
		}
	}
		
	drop xbd xb d
	
	}
	 
}
*Save dataset:
keep se_* b_* w_* worker_id
duplicates drop worker_id, force 	
drop if worker_id==.

save "${tmpd}inv_se_tw.dta", replace
restore 


**************************
**(6) SES TW CLUSTERED AT THE CHILD AND ROTATION LEVELS 
**************************
preserve 
//get levels of invid
levelsof worker_id
local levels = "`r(levels)'"

// loop over outcomes 
foreach var in fc inv6m {

	// for lah outcome use the entire sample
	if "`var'"=="fc"{
		reghdfe `var' i.worker_id i.worker_id#i.pre_black, ///
			absorb(rotationgroup) cluster(rotationgroup vicid) resid 
	}
	
	// for other outcomes condition on leave at home
	if "`var'"!="fc"{
		reghdfe `var' i.worker_id i.worker_id#i.pre_black if fc==0, ///
			absorb(rotationgroup) cluster(rotationgroup vicid) resid 
	}
	
		gen se_twrot_gap_`var' = .
		gen se_twrot_base_`var' = .
				
		foreach i in `levels' {
		
			// retrieve gaps
			cap replace se_twrot_gap_`var' = _se[`i'.worker_id#1.pre_black] if worker_id == `i'
			
			// retrieve base values
			cap replace se_twrot_base_`var'= _se[`i'.worker_id] if  worker_id==`i'				
			
	   }
	   
	// aggregate outcomes
	// E[Y|inv,r] = invFE + invFE*black + g*E[X_i]
   
	local exp 0
   
	// retrieve average of absorbvar
	
	predict xbd, xbd
	predict xb, xb
	gen d = xbd - xb 
	sum d 
	
	local exp = `exp'+ `r(mean)' 
	
	// generate aggregate outcomes
	local baselin "_cons"
	
	gen w_se_twrot_fe_`var'=.
	gen b_se_twrot_fe_`var'=.
	
	// quietly
	qui {
	// white children
	foreach i in `levels'{
	
		if `i'==1 {
		
			local lin "`baselin'"
			lincom `lin' + `exp'
			replace w_se_twrot_fe_`var'=`r(se)' if worker_id==`i'
			
		}
		
		else {
			
			local lin "`baselin' + _b[`i'.worker_id]"
			lincom `lin' + `exp'
			replace w_se_twrot_fe_`var'=`r(se)' if worker_id==`i'
			
		}
	}	
	
	// black children
	foreach i in `levels'{
	
		if `i'==1 {
		
			local lin "`baselin' + _b[`i'.worker_id#1.pre_black]"
			lincom `lin' + `exp'
			replace b_se_twrot_fe_`var'=`r(se)' if worker_id==`i'
			
		}
		
		else {
			
			local lin "`baselin' + _b[`i'.worker_id] + _b[`i'.worker_id#1.pre_black]"
			lincom `lin' + `exp'
			replace b_se_twrot_fe_`var'=`r(se)' if worker_id==`i'
			
		}
	}
		
	drop xbd xb d
	
	}
	 
}

*Save dataset:
keep se_* b_* w_* worker_id
duplicates drop worker_id, force 	
drop if worker_id==.

save "${tmpd}inv_se_twrot.dta", replace
restore 


**************************
**(7) SES CLUSTERED AT THE ROTATION LEVEL
**************************
preserve 
//get levels of invid
levelsof worker_id
local levels = "`r(levels)'"

// loop over outcomes 
foreach var in fc inv6m {

	// for lah outcome use the entire sample
	if "`var'"=="fc"{
		reghdfe `var' i.worker_id i.worker_id#i.pre_black, ///
			absorb(rotationgroup) cluster(rotationgroup) resid 
	}
	
	// for other outcomes condition on leave at home
	if "`var'"!="fc"{
		reghdfe `var' i.worker_id i.worker_id#i.pre_black if fc==0, ///
			absorb(rotationgroup) cluster(rotationgroup) resid 
	}
	
		gen se_rot_gap_`var' = .
		gen se_rot_base_`var' = .
				
		foreach i in `levels' {
		
			// retrieve gaps
			cap replace se_rot_gap_`var' = _se[`i'.worker_id#1.pre_black] if worker_id == `i'
			
			// retrieve base values
			cap replace se_rot_base_`var'= _se[`i'.worker_id] if  worker_id==`i'				
			
	   }
	   
	// aggregate outcomes
	// E[Y|inv,r] = invFE + invFE*black + g*E[X_i]
   
	local exp 0
   
	// retrieve average of absorbvar
	
	predict xbd, xbd
	predict xb, xb
	gen d = xbd - xb 
	sum d 
	
	local exp = `exp'+ `r(mean)' 
	
	// generate aggregate outcomes
	local baselin "_cons"
	
	gen w_se_rot_fe_`var'=.
	gen b_se_rot_fe_`var'=.
	
	// quietly
	qui {
	// white children
	foreach i in `levels'{
	
		if `i'==1 {
		
			local lin "`baselin'"
			lincom `lin' + `exp'
			replace w_se_rot_fe_`var'=`r(se)' if worker_id==`i'
			
		}
		
		else {
			
			local lin "`baselin' + _b[`i'.worker_id]"
			lincom `lin' + `exp'
			replace w_se_rot_fe_`var'=`r(se)' if worker_id==`i'
			
		}
	}	
	
	// black children
	foreach i in `levels'{
	
		if `i'==1 {
		
			local lin "`baselin' + _b[`i'.worker_id#1.pre_black]"
			lincom `lin' + `exp'
			replace b_se_rot_fe_`var'=`r(se)' if worker_id==`i'
			
		}
		
		else {
			
			local lin "`baselin' + _b[`i'.worker_id] + _b[`i'.worker_id#1.pre_black]"
			lincom `lin' + `exp'
			replace b_se_rot_fe_`var'=`r(se)' if worker_id==`i'
			
		}
	}
		
	drop xbd xb d
	
	}
	 
}

*Save dataset:
keep se_* b_* w_* worker_id
duplicates drop worker_id, force 	
drop if worker_id==.

save "${tmpd}inv_se_rot.dta", replace
restore 


**************************
**(8) GENERATE MAIN INVESTIGATOR PARAMETER DATASET 
**************************
//Start with regression-adjusted investigator placement and maltreatment rates,
//then merge with standard errors clustered at various levels//
use "${tmpd}inv_rates.dta", clear 

merge 1:1 worker_id using "${tmpd}inv_se.dta", keepusing(b_se_fe_fc b_se_fe_inv6m w_se_fe_fc w_se_fe_inv6m) keep(1 3)
cap drop _merge 

merge 1:1 worker_id using "${tmpd}inv_se_worker.dta", keepusing(b_se_worker_fe_fc b_se_worker_fe_inv6m w_se_worker_fe_fc w_se_worker_fe_inv6m) keep(1 3)
cap drop _merge 

merge 1:1 worker_id using "${tmpd}inv_se_tw.dta", keepusing(b_se_tw_fe_fc b_se_tw_fe_inv6m w_se_tw_fe_fc w_se_tw_fe_inv6m) keep(1 3)
cap drop _merge 

merge 1:1 worker_id using "${tmpd}inv_se_twrot.dta", keepusing(b_se_twrot_fe_fc b_se_twrot_fe_inv6m w_se_twrot_fe_fc w_se_twrot_fe_inv6m) keep(1 3)
cap drop _merge 

merge 1:1 worker_id using "${tmpd}inv_se_rot.dta", keepusing(b_se_rot_fe_fc b_se_rot_fe_inv6m w_se_rot_fe_fc w_se_rot_fe_inv6m) keep(1 3)
cap drop _merge 

save "${cleand}inv_rates_se_all.dta", replace 


**************************
**(9) MERGE TO INVESTIGATOR COVARIATES AND SAVE FINAL DATASET
**************************	
	use "${cleand}inv_rates_se_all.dta", clear
	count 
	local n_inv = r(N)
	
	*Merge to investigator covariates
	cap drop _merge 
	merge 1:1 worker_id using "${tmpd}inv_covariates.dta", keep(1 3)
	
	foreach x in w_fe_fc b_fe_fc w_fe_inv6m b_fe_inv6m {
		replace `x'=0 if `x'<0 
		replace `x'=1 if `x'>1
	}
	
	foreach x in w_fe_fc b_fe_fc {
		replace `x' = 1 - `x'
	}	
	
	save "${cleand}limited_inv_analysis_sample.dta", replace